Required packages

We need the following packages:

required_packages <- c("readxl",
                       "purrr",
                       "dplyr",
                       "stringr",
                       "tidyr",
                       "AMR",
                       "magrittr")

Installing those that are not already installed on the system:

for (package in required_packages) {
  if (!package %in% installed.packages()) install.packages(package)
}

Loading the required packages:

devnull <- lapply(required_packages, require, character.only = TRUE)

Loading and fixing the data

Antimicrobial use

Date and type of antimicrobial use per farm:

amu <- read_excel("AMU_KietStudy_Marc.xlsx")

It is a data frame that looks like this:

amu
## # A tibble: 90 × 5
##    FarmID AgeUse_Day DurationOfUse_days ProductNo AAI             
##    <chr>       <dbl>              <dbl>     <dbl> <chr>           
##  1 K06            30                  4         1 sulfadimidine   
##  2 K06            30                  4         1 sulfaquinoxaline
##  3 K06            35                  3         2 doxycyline      
##  4 K06            35                  3         2 tylosin         
##  5 K07             1                  4         1 oxytetracyline  
##  6 K07             1                  4         1 colistin        
##  7 K07            36                  1         2 ampicillin      
##  8 K07            36                  1         2 colistin        
##  9 K07            38                  3         3 flumequine      
## 10 K07            38                  3         4 enrofloxacin    
## # … with 80 more rows

Antimicrobial classes

Antimicrobial class of the antimicrobial(s) against which each of the resistance genes is active:

classes <- read_excel("FarmvsClass.xlsx")

It’s a data frame that looks like this:

classes
## # A tibble: 81 × 2
##    EvaGreen_Name Antimicrobial_Class
##    <chr>         <chr>              
##  1 aac3-liacde   Aminoglycoside     
##  2 aac6-aph2     Aminoglycoside     
##  3 aac6-lb       Aminoglycoside     
##  4 aac6-li       Aminoglycoside     
##  5 aac6-lla      Aminoglycoside     
##  6 aadA          Aminoglycoside     
##  7 aadE          Aminoglycoside     
##  8 aadE-like     Aminoglycoside     
##  9 acrA          Multidrug          
## 10 acrB          Multidrug          
## # … with 71 more rows

Note that each resistance gene can be active against antimicrobials of more than one antimicrobial class.

Resistance genes concentrations

The names of the farms:

(farms <- paste0("K", c(formatC(6:9, 1, flag = "0"), 11:26)))
##  [1] "K06" "K07" "K08" "K09" "K11" "K12" "K13" "K14" "K15" "K16" "K17" "K18"
## [13] "K19" "K20" "K21" "K22" "K23" "K24" "K25" "K26"

Let’s start by looking at the resistance genes concentrations in chicken only (maybe we’ll look at the rat data later on):

genes <- farms %>%
  map(read_excel, path = "KietAnalysisData.xlsx") %>% 
  map(filter, Group.1 %in% c("Chicken-Control", "Chicken-Treatment")) %>% 
  map(select, -Group.1, -TotalLog2Value) %>% 
  setNames(farms)

In the excel file, the gene concentrations for a given farm (with control and treatment experiments) are in a different tab. This structure is mapped in the genes object that is a list of data frames, each of them containing the genes concentrations for a given farm. As an example the data for the first farm look like this:

genes[[1]]
## # A tibble: 10 × 84
##    FarmID Group     Sample_Name SamplingDay `aac3-liacde` `aac6-aph2` `aac6-lb`
##    <chr>  <chr>     <chr>       <chr>               <dbl>       <dbl>     <dbl>
##  1 K06    Control   K06-BC      Before              4.74         7.42     5.75 
##  2 K06    Control   K06-07C     7                   1.96         5.55     2.21 
##  3 K06    Control   K06-14C     14                  0.205        4.55     0.138
##  4 K06    Control   K06-60C     60                  1.46         6.01     1.69 
##  5 K06    Control   K06-EC      End                 0.856        5.57     1.08 
##  6 K06    Treatment K06-B       Before              3.65         6.20     5.18 
##  7 K06    Treatment K06-07      7                   4.00         5.85     3.93 
##  8 K06    Treatment K06-14      14                  1.96         5.01     0.803
##  9 K06    Treatment K06-60      60                  5.36         3.07     5.93 
## 10 K06    Treatment K06-E       End                 5.72         5.49     4.44 
## # … with 77 more variables: `aac6-li` <dbl>, `aac6-lla` <dbl>, aadA <dbl>,
## #   aadE <dbl>, `aadE-like` <dbl>, acrA <dbl>, acrB <dbl>, acrF <dbl>,
## #   `aph2-lb` <dbl>, `aph2-lde` <dbl>, `aph3-lac` <dbl>, `aph3-lll` <dbl>,
## #   arnA <dbl>, bacA_1 <dbl>, bacA_2 <dbl>, blaAMPC <dbl>, blaCMY2 <dbl>,
## #   blaCTXM <dbl>, blaDHA <dbl>, blaPSE <dbl>, blaSHV <dbl>, blaTEM <dbl>,
## #   blaZ <dbl>, cat <dbl>, cblA <dbl>, cepA <dbl>, cepA2 <dbl>, cfr <dbl>,
## #   cfr2 <dbl>, cfxA <dbl>, `cmlA1-01` <dbl>, cmr <dbl>, dfrA <dbl>, …

Checking and fixing the columns names consistency

Let’s check that the names of the columns are the same for all the farms:

genes %>%
  map_df(names) %>% 
  apply(1, unique) %>% 
  .[map_int(., length) > 1]
## [[1]]
## [1] "FarmID" "K09"

There is a problem with with the FarmID variable that is called K09 is one of the farms. Let’s fix this. As the names of the variables seem OK in the first farm:

(correct_names <- names(genes[[1]]))
##  [1] "FarmID"      "Group"       "Sample_Name" "SamplingDay" "aac3-liacde"
##  [6] "aac6-aph2"   "aac6-lb"     "aac6-li"     "aac6-lla"    "aadA"       
## [11] "aadE"        "aadE-like"   "acrA"        "acrB"        "acrF"       
## [16] "aph2-lb"     "aph2-lde"    "aph3-lac"    "aph3-lll"    "arnA"       
## [21] "bacA_1"      "bacA_2"      "blaAMPC"     "blaCMY2"     "blaCTXM"    
## [26] "blaDHA"      "blaPSE"      "blaSHV"      "blaTEM"      "blaZ"       
## [31] "cat"         "cblA"        "cepA"        "cepA2"       "cfr"        
## [36] "cfr2"        "cfxA"        "cmlA1-01"    "cmr"         "dfrA"       
## [41] "dfrF"        "ermA"        "ermB"        "ermC"        "floR"       
## [46] "folA"        "fosB"        "fox5"        "macB"        "mcr-1"      
## [51] "mcr-2"       "mcr-3"       "mdtF"        "mdtL"        "mdtO"       
## [56] "mecA"        "mefa10"      "mefA3"       "mfsA"        "oprD"       
## [61] "oqxA"        "oqxB"        "qacA"        "qacC"        "qacE"       
## [66] "qnrA"        "qnrB"        "qnrS"        "sat4"        "spc"        
## [71] "strB"        "sul1"        "sul2"        "sul3"        "tetB"       
## [76] "tetC-01"     "tetM"        "tetO"        "tetQ"        "tetW"       
## [81] "tolC"        "vanA"        "vanB"        "vatA"

Let’s use them as variables names for all the farms:

genes %<>% map(setNames, correct_names)

Dealing with missing “Before” measurements

Note also that not all farms have a “Before” measurement in the control group:

(tmp <- genes %>%
  map(~ .x %>% filter(SamplingDay == "Before") %>% select(Group)) %>% 
  .[map_int(., nrow) < 2] %>% 
  unlist())
##   K14.Group   K15.Group   K16.Group   K17.Group   K18.Group   K20.Group 
## "Treatment" "Treatment" "Treatment" "Treatment" "Treatment" "Treatment" 
##   K21.Group   K22.Group   K23.Group 
## "Treatment" "Treatment" "Treatment"

For the farms where the “Before” measurement is missing in the control group, let’s just use the “Before” measurement of the treatment group. For that, we need the following function where x is a data frame for one farm:

control_before <- function(x) {
  y <- filter(x, SamplingDay == "Before")
  y$Group <- "Control"
  y$Sample_Name <- NA
  rbind(x, y)
}

Let’s now use this function on all the farms that needed to be fixed:

# the names of the farms that need to be fixed:
farms_to_fix <- tmp %>% 
  names() %>% 
  str_remove(".Group")

# the fixed data for these farms:
fixed_farms <- genes %>% 
  extract(farms_to_fix) %>% 
  map(control_before)

# updating the original data with the fixed data:
genes[farms_to_fix] <- fixed_farms

Preparing the data for analyses

Computing sums of resistance genes

Here we compute aggregates of resistance genes, these aggregates being the sums of all the resistance genes but also the sum of all the resistance genes by class of antimicrobial against which they are effective. Indeed, we will perform the analyses in 3 different ways: per resistant gene, for all resistant genes altogether, and per class of antimicrobials against which the resistant genes are active.

Let’s start by retrieving the names of resistance genes:

(resistance_genes <- setdiff(names(genes[[1]]),
                             c("FarmID", "Group", "Sample_Name", "SamplingDay")))
##  [1] "aac3-liacde" "aac6-aph2"   "aac6-lb"     "aac6-li"     "aac6-lla"   
##  [6] "aadA"        "aadE"        "aadE-like"   "acrA"        "acrB"       
## [11] "acrF"        "aph2-lb"     "aph2-lde"    "aph3-lac"    "aph3-lll"   
## [16] "arnA"        "bacA_1"      "bacA_2"      "blaAMPC"     "blaCMY2"    
## [21] "blaCTXM"     "blaDHA"      "blaPSE"      "blaSHV"      "blaTEM"     
## [26] "blaZ"        "cat"         "cblA"        "cepA"        "cepA2"      
## [31] "cfr"         "cfr2"        "cfxA"        "cmlA1-01"    "cmr"        
## [36] "dfrA"        "dfrF"        "ermA"        "ermB"        "ermC"       
## [41] "floR"        "folA"        "fosB"        "fox5"        "macB"       
## [46] "mcr-1"       "mcr-2"       "mcr-3"       "mdtF"        "mdtL"       
## [51] "mdtO"        "mecA"        "mefa10"      "mefA3"       "mfsA"       
## [56] "oprD"        "oqxA"        "oqxB"        "qacA"        "qacC"       
## [61] "qacE"        "qnrA"        "qnrB"        "qnrS"        "sat4"       
## [66] "spc"         "strB"        "sul1"        "sul2"        "sul3"       
## [71] "tetB"        "tetC-01"     "tetM"        "tetO"        "tetQ"       
## [76] "tetW"        "tolC"        "vanA"        "vanB"        "vatA"

As a reminder classes looks like this:

classes
## # A tibble: 81 × 2
##    EvaGreen_Name Antimicrobial_Class
##    <chr>         <chr>              
##  1 aac3-liacde   Aminoglycoside     
##  2 aac6-aph2     Aminoglycoside     
##  3 aac6-lb       Aminoglycoside     
##  4 aac6-li       Aminoglycoside     
##  5 aac6-lla      Aminoglycoside     
##  6 aadA          Aminoglycoside     
##  7 aadE          Aminoglycoside     
##  8 aadE-like     Aminoglycoside     
##  9 acrA          Multidrug          
## 10 acrB          Multidrug          
## # … with 71 more rows

Let’s add a category All to the Antimicrobial_Class variable. This category will simply correspond to all the resistance genes:

classes %<>% 
  bind_rows(data.frame(EvaGreen_Name = resistance_genes,
                       Antimicrobial_Class = "All"))

The antimicrobial classes against which each resistance gene is active now looks like:

(classes_names <- unique(classes$Antimicrobial_Class))
##  [1] "Aminoglycoside" "Multidrug"      "Polymyxin"      "Other"         
##  [5] "Beta-Lactam"    "Phenicol"       "Sulfonamide"    "MLSB"          
##  [9] "Quinolone"      "Tetracycline"   "Glycopeptide"   "All"

Note that Other and All categories are actually not antimicrobials classes per se. The following function calculates the sum of the genes concentration for a given class am_class for the data frame gene_farm of a given farm:

sum_by_class <- function(am_class, gene_farm) {
  classes %>%
    filter(Antimicrobial_Class == am_class) %>% 
    pull(EvaGreen_Name) %>% 
    extract(gene_farm, .) %>% 
    rowSums()
}

The following function uses the one above and adds as many variables as antimicrobial classes (12) to the data frame of a given farm:

add_sums_by_class <- function(gene_farm) {
  gene_farm %>%
    map_dfc(classes_names, sum_by_class, .) %>% 
    setNames(classes_names) %>% 
    bind_cols(gene_farm, .)
}

Let’s use this function to add the sums of the genes concentrations to the data frames of all the farms:

genes %<>% map(add_sums_by_class)

Antimicrobial classes for AMU

As a reminder the data on AMU looks like this:

amu
## # A tibble: 90 × 5
##    FarmID AgeUse_Day DurationOfUse_days ProductNo AAI             
##    <chr>       <dbl>              <dbl>     <dbl> <chr>           
##  1 K06            30                  4         1 sulfadimidine   
##  2 K06            30                  4         1 sulfaquinoxaline
##  3 K06            35                  3         2 doxycyline      
##  4 K06            35                  3         2 tylosin         
##  5 K07             1                  4         1 oxytetracyline  
##  6 K07             1                  4         1 colistin        
##  7 K07            36                  1         2 ampicillin      
##  8 K07            36                  1         2 colistin        
##  9 K07            38                  3         3 flumequine      
## 10 K07            38                  3         4 enrofloxacin    
## # … with 80 more rows

In order to work with AMU by antimicrobial class, we need to generate the antimicrobial class of each of the antimicrobial of the AAI column. For that, we can use the ab_group() function of the AMR package, but, for consistency with the classes data frame, we need to generate a hash table that provide the correspondence between the spelling of the given by the AMR::ab_group() function and the spelling used in our classes data frame:

#          AMR package:                  classes data frame:
hash <- c("Aminoglycosides"           = "Aminoglycoside",
          "Amphenicols"               = "Phenicol",
          "Antifungals/antimycotics"  = "Antifungals/antimycotics",
          "Beta-lactams/penicillins"  = "Beta-Lactam",
          "Cephalosporins (3rd gen.)" = "Beta-Lactam",
          "Macrolides/lincosamides"   = "MLSB",
          "Other antibacterials"      = "Other",
          "Polymyxins"                = "Polymyxin",
          "Quinolones"                = "Quinolone",
          "Tetracyclines"             = "Tetracycline",
          "Trimethoprims"             = "Sulfonamide")

Let’s generate the correspondence data frame:

(amu_classes <- amu %>% 
  select(AAI) %>% 
  unique() %>% 
  mutate(AMR_package = map_chr(AAI, ab_group),
         classes_df  = hash[AMR_package]))
## Warning: in `as.ab()`: these values could not be coerced to a valid antimicrobial
## ID: "bromhexine".
## # A tibble: 25 × 3
##    AAI              AMR_package               classes_df  
##    <chr>            <chr>                     <chr>       
##  1 sulfadimidine    Trimethoprims             Sulfonamide 
##  2 sulfaquinoxaline Other antibacterials      Other       
##  3 doxycyline       Tetracyclines             Tetracycline
##  4 tylosin          Macrolides/lincosamides   MLSB        
##  5 oxytetracyline   Tetracyclines             Tetracycline
##  6 colistin         Polymyxins                Polymyxin   
##  7 ampicillin       Beta-lactams/penicillins  Beta-Lactam 
##  8 flumequine       Quinolones                Quinolone   
##  9 enrofloxacin     Quinolones                Quinolone   
## 10 ceftiofur        Cephalosporins (3rd gen.) Beta-Lactam 
## # … with 15 more rows

Let’s check for missing values:

filter(amu_classes, if_any(.fns = is.na))
## # A tibble: 1 × 3
##   AAI        AMR_package classes_df
##   <chr>      <chr>       <chr>     
## 1 bromhexine <NA>        <NA>

Let’s fix it manually:

amu_classes[amu_classes$AAI == "bromhexine", "classes_df"] <- "Mucoactive agent"

Finally, we can use amu_classes to make another hash table:

hash <- with(amu_classes, setNames(classes_df, AAI))

And we can use this new hash table to add the antimicrobial class to the amu data frame:

(amu %<>% mutate(class = hash[AAI]))
## # A tibble: 90 × 6
##    FarmID AgeUse_Day DurationOfUse_days ProductNo AAI              class       
##    <chr>       <dbl>              <dbl>     <dbl> <chr>            <chr>       
##  1 K06            30                  4         1 sulfadimidine    Sulfonamide 
##  2 K06            30                  4         1 sulfaquinoxaline Other       
##  3 K06            35                  3         2 doxycyline       Tetracycline
##  4 K06            35                  3         2 tylosin          MLSB        
##  5 K07             1                  4         1 oxytetracyline   Tetracycline
##  6 K07             1                  4         1 colistin         Polymyxin   
##  7 K07            36                  1         2 ampicillin       Beta-Lactam 
##  8 K07            36                  1         2 colistin         Polymyxin   
##  9 K07            38                  3         3 flumequine       Quinolone   
## 10 K07            38                  3         4 enrofloxacin     Quinolone   
## # … with 80 more rows

Here, the categories used in the class variable of the amu data frame are consistent with the categories used in the Antimicrobial_Class of the classes data frame.

Data visualization

Plotting the raw data per farm and gene

The time points:

genes %>%
  map(pull, SamplingDay) %>% 
  unlist() %>% 
  unique()
## [1] "Before" "7"      "14"     "60"     "End"    "After"  "30"     "90"

Generating new time points and ordering the data chronologically:

genes %<>% map(mutate,
               SamplingDay2 = as.integer(recode(SamplingDay,
                                                Before = "-7",
                                                After  = "0",
                                                End    = "120"))) %>% 
  map(arrange, Group, SamplingDay2) # making sure that data are arranged chronologically

A utility function for the function plot_gene_concentration() that follows after. This function adds the dots and lines to a plot:

plot_data <- function(x, gene, col, lwd = 2, lty = 3) {
  lines2 <- function(...) lines(..., col = col, lwd = lwd)
  nrows <- nrow(x)
  x2 <- x[-c(1, nrows), ]
  x3 <- x[1:2, ]
  x4 <- x[(nrows - 1):nrows, ]
  points(x$SamplingDay2, x[[gene]], col = col, lwd = lwd)
  lines2(x2$SamplingDay2, x2[[gene]])
  lines2(x3$SamplingDay2, x3[[gene]], lty = lty)
  lines2(x4$SamplingDay2, x4[[gene]], lty = lty)
}

Another utility function for the function plot_gene_concentration() that follows after. This function plot the axes, their range and labels:

plot_frame <- function(...) {
  plot(type = "n", xlim = c(-10, 120), axes = FALSE, ...)
  ats <- c(-7, seq(0, 120, 20))
  lbs <- ats
  lbs[1] <- "before"
  lbs[length(lbs)] <- "end"
  axis(1, ats, lbs)
  axis(2)
}

The following function uses the two previous functions to plots the gene concentrations as a function of time for a given resistance gene gene in a given farm farm. And this for both control and treatment experiments:

plot_gene_concentration <- function(farm, gene, text) {
  farm_dataset <- genes[[farm]]
  
  plot_frame(farm_dataset$SamplingDay2, farm_dataset[[gene]],
             xlab = "time after treatment (days)", ylab = "gene concentration")

  farm_dataset %>%
    filter(Group == "Control") %>% 
    plot_data(gene, "blue")
  
  farm_dataset %>%
    filter(Group == "Treatment") %>% 
    plot_data(gene, "red")

  mtext(text)
  abline(v = 0, lwd = 2)
}

The text argument is a character string to use as the title of the plot. Plotting aac3-liacde for control and treatment experiments in farm K06:

plot_gene_concentration(farms[1], resistance_genes[1], resistance_genes[1])
legend("right", legend = c("treatment", "control"), col = c("red", "blue"),
       lty = 1, lwd = 2, bty = "n")

Plotting all the genes for a given farm

Number of columns we want and some graph tuning:

ncols <- 4
plt_val <- c(.13, .92, .22, .9)

Plotting all the genes for the first farm:

opar <- par(mfrow = c(ceiling(length(resistance_genes) / ncols), ncols), plt = plt_val)
walk(resistance_genes, ~ plot_gene_concentration(farms[1], .x, .x))

par(opar)

Plotting all the farms for a given gene

Plotting all the farms for the first gene:

opar <- par(mfrow = c(ceiling(length(farms) / ncols), ncols), plt   = plt_val)
walk(farms, ~ plot_gene_concentration(.x, resistance_genes[1], .x))

par(opar)

Gathering data per gene

Here we want to plot on a single graph the data from all the farms (with control and treatment experiments), with one line per experiment time series. For that, we first need to standardize the genes concentration in order to ensure that dynamics are comparable across experiments and farms. The standardization is done by taking the Before measurement as a reference value.

Standardizing the data

The following function allows to standardize the data for all the resistance genes from one given experiment (i.e. either control or treatment in one given farm):

standardize_by_before_ <- function(x) {
  rbind(rep(1, length(resistance_genes)),
        sweep(as.matrix(x[x$SamplingDay != "Before", resistance_genes]), 2,
                 unlist(x[x$SamplingDay == "Before", resistance_genes]), `/`))
}

Here x is the subset of the data frame of genes concentrations of a given farm that correspond either to the control of the treatment experiment. This function below uses the one above and standardizes the data for all the resistance genes for both the control and treatment experiments of a given farm:

standardize_by_before <- function(x) {
  x[, resistance_genes] <- rbind(standardize_by_before_(filter(x, Group == "Control")),
                                 standardize_by_before_(filter(x, Group == "Treatment")))
  
  x
}

Now here x is the data frame of genes concentrations of a given farm. Let’s use this function to standardize the gene concentrations for all the farms:

genes_standardized <- map(genes, standardize_by_before)

Let’s now plot the concentrations of a given gene across the farms on a single plot. Let’s first define the following function that draws the layout the plot:

plot_frame2 <- function(...) {
  plot_frame(..., xlab = "time (days)", ylab = "standardized genes concentrations")
}

Let’s now explore various ways to plot the data.

Experiments time series

Now, we can start exploring various option of plotting the data, starting with this function:

plot_gene_all_farms <- function(gene, infinity = -1000) {
  transformation <- function(x) {
    x[, 3] <- log10(x[, 3])
    x
  }
  
  tmp <- genes_standardized %>%
    map(extract, c("Group", "SamplingDay2", gene)) %>%
    map(transformation)

  tmp2 <- bind_rows(tmp)
  plot_frame2(tmp2[[2]], tmp2[[3]])

  tmp %>% 
    map(function(x) {x[[3]] <- replace(x[[3]], is.infinite(x[[3]]), infinity); x}) %>% 
    map(~ split(.x, .x$Group)) %>% 
    unlist(recursive = FALSE) %>% 
    map(mutate, col = c("blue", "red")[(Group == "Treatment") + 1]) %>% 
    sample() %>% # with want to shuffle the treatments for unbias visualization
    walk(~ plot_data(.x, gene, col = .x$col))
  
  mtext(gene)
  abline(h = 0, lwd = 2)
  abline(v = 0, lwd = 2)
}

Note here that we transform the standardized genes concentrations. Depending on the chosen transformation, it may generate infinity values. For visualization purpose we replace these values by the big number infinity in the list of the function’s arguments. Let’s try it on one resistance gene:

plot_gene_all_farms(resistance_genes[1])
legend("bottomright", legend = c("treatment", "control"), col = c("red", "blue"),
       lty = 1, lwd = 2, bty = "n")

Boxplots

Let’s try an alternative visualization, using this following function for box-plots:

boxplot2 <- function(x, eps, col, ...) {
  boxplot(x[[3]] ~ x[[2]], at =  unique(x[[2]]) + eps, add = TRUE, axes = FALSE,
          boxwex = 2.5, col = adjustcolor(col, .5), outline = FALSE, ...)
}

Here is the alternative visualization:

plot_gene_all_farms_boxplot <- function(gene) {
  eps <- 1.5
  
  tmp <- genes_standardized %>% 
    map(filter, SamplingDay != "Before") %>% 
    map(extract, c("Group", "SamplingDay2", gene))

  tmp2 <- bind_rows(tmp)
  plot_frame2(tmp2[[2]], tmp2[[3]])

  control <- map(tmp, filter, Group == "Control")
  walk(control, ~ points(jitter(.x[[2]] - 2), .x[[3]], col = "blue"))
  control %>%
    bind_rows() %>% 
    boxplot2(-eps, col = "blue")
  
  treatment <- map(tmp, filter, Group == "Treatment")
  walk(treatment, ~ points(jitter(.x[[2]] + 2), .x[[3]], col = "red"))
  treatment %>%
    bind_rows() %>% 
    boxplot2(eps, col = "red")
  
  mtext(gene)
  legend("topright", legend = c("treatment", "control"), fill = c("red", "blue"), bty = "n")
}

Let’s try it:

plot_gene_all_farms_boxplot(resistance_genes[1])

Mmmm… For some reason, interquartiles ranges look a bit weird on some of these boxplots. Not sure how this is calculated but I don’t really like it.

Violin plots

Let’s try a violin plot instead, by using this function:

vioplot2 <- function(x, eps, color, ...) {
  vioplot::vioplot(x[[3]] ~ x[[2]], at =  unique(x[[2]]) + eps, add = TRUE,
                   axes = FALSE, fill = color, lineCol = color, border = color,
                   wex = 4, col = adjustcolor(color, .5), ...)
}

And the new plotting function:

plot_gene_all_farms_violin <- function(gene) {
  eps <- 1.5
  
  tmp <- genes_standardized %>% 
    map(filter, SamplingDay != "Before") %>% 
    map(extract, c("Group", "SamplingDay2", gene))

  tmp2 <- bind_rows(tmp)
  plot_frame2(tmp2[[2]], tmp2[[3]])

  control <- map(tmp, filter, Group == "Control")
  walk(control, ~ points(jitter(.x[[2]] - 2), .x[[3]], col = "blue"))
  control %>%
    bind_rows() %>% 
    vioplot2(-eps, "blue")
  
  treatment <- map(tmp, filter, Group == "Treatment")
  walk(treatment, ~ points(jitter(.x[[2]] + 2), .x[[3]], col = "red"))
  treatment %>%
    bind_rows() %>% 
    vioplot2(eps, "red")
  
  mtext(gene)
  legend("topright", legend = c("treatment", "control"), fill = c("red", "blue"), bty = "n")
}

Let’s try it too:

plot_gene_all_farms_violin(resistance_genes[1])

Not sure I like it either…

Paired treatment and control

Let’s plot paired treatment and control for each farm instead. For that, we need the following function:

plot_gene_all_farms_pairwise <- function(gene) {
  col_val <- adjustcolor(c("blue", "red"), .5)

  genes %>% 
    map(extract, c("SamplingDay2", "Group", gene)) %>% 
    map(pivot_wider, names_from = Group, values_from = 3) %>% 
    bind_rows() %>% 
    mutate(SamplingDay3 = jitter(SamplingDay2),
           color = (Treatment > Control) + 1) %>% 
    with({
      plot_frame2(rep(SamplingDay3, 2), c(Control, Treatment))
      points(SamplingDay3, Control, col = "blue")
      points(SamplingDay3, Treatment, col = "red")
      arrows(SamplingDay3, Control, SamplingDay3, Treatment, 0, col = col_val[color])
    })
  
  mtext(gene)
}

Let’s try it:

plot_gene_all_farms_pairwise(resistance_genes[1])

Differences between treatment and control

Experiment time series

Let’s plot the difference between treatment and control for each farm. For that, we need the following function:

plot_differences <- function(gene) {
  tmp <- genes %>% 
    map(extract, c("SamplingDay2", "Group", gene)) %>% 
    map(pivot_wider, names_from = Group, values_from = 3) %>% 
    map(mutate, difference = Treatment - Control)

  tmp %>% 
    bind_rows() %$% 
    plot_frame2(SamplingDay2, difference)
  
  walk(tmp, ~ with(.x, lines(SamplingDay2, difference, col = "green", lwd = 2)))

  abline(h = 0)
  mtext(gene)
}

Let’s try it:

plot_differences(resistance_genes[1])

Boxplots

Let’s consider this function with boxplots:

plot_differences_boxplot <- function(gene) {
  
  tmp <- genes %>% 
    map(extract, c("SamplingDay2", "Group", gene)) %>% 
    map(pivot_wider, names_from = Group, values_from = 3) %>% 
    map(mutate, difference = Treatment - Control) %>% 
    bind_rows()

  with(tmp, plot(jitter(SamplingDay2), difference, col = "green",
                 xlab = "time (days)",
                 ylab = "difference in standardized genes concentrations"))
  
  tmp %>% 
    select(2, 1, 3) %>% 
    boxplot2(0, col = "green")

  abline(h = 0)
  mtext(gene)
}

Let’s try it:

plot_differences_boxplot(resistance_genes[1])

The boxplot still looks weird.

Violin plots

Let’s look at the violin alternative:

plot_differences_violin <- function(gene) {
  
  tmp <- genes %>% 
    map(extract, c("SamplingDay2", "Group", gene)) %>% 
    map(pivot_wider, names_from = Group, values_from = 3) %>% 
    map(mutate, difference = Treatment - Control) %>% 
    bind_rows()

  with(tmp, plot(jitter(SamplingDay2), difference, col = "green",
                 xlab = "time (days)",
                 ylab = "difference in standardized genes concentrations"))
  
  tmp %>% 
    select(2, 1, 3) %>% 
    vioplot2(0, col = "green")

  abline(h = 0)
  mtext(gene)
}

Let’s try it:

plot_differences_violin(resistance_genes[1])

Same comment.

Quantiles

Let’s consider another option.

plot_differences_quantiles <- function(gene) {
  tmp <- genes %>% 
    map(extract, c("SamplingDay2", "Group", gene)) %>% 
    map(pivot_wider, names_from = Group, values_from = 3) %>% 
    map(mutate, difference = Treatment - Control) %>% 
    bind_rows()

  with(tmp, plot(jitter(SamplingDay2), difference, col = "darkgrey",
                 xlab = "time (days)",
                 ylab = "difference in standardized genes concentrations"))

  x_val <- sort(unique(tmp$SamplingDay2))
  
  tmp %>%
    select(SamplingDay2, difference) %>% 
    group_by(SamplingDay2) %>% 
    group_split() %>% 
    map(~ quantile(.x$difference, c(.25, .5, .75), na.rm = TRUE)) %>% 
    bind_rows() %>% 
    mutate(x_val = x_val) %>% 
    with({
      points(x_val, `50%`, lwd = 2)
      arrows(x_val, `25%`, x_val, `75%`, .1, 90, 3, lwd = 2, col = "black")
      lines(x_val, `25%`, lty = 3)
      lines(x_val, `50%`, lty = 2)
      lines(x_val, `75%`, lty = 3)
    })
  
  abline(h = 0)
  mtext(gene)
}

Let’s try it:

plot_differences_quantiles(resistance_genes[1])